// 爬取我关注的豆瓣小组url
const db = require('../config/db')
const schedule = require('node-schedule')
const sleep = require('sleep')
var superagent = require('superagent')
require('superagent-proxy')(superagent)
var resolve = require('./analysisHTML')
var seedUrl = 'https://www.douban.com/group/257523/discussion?start='
var now = new Date()
now.setDate(now.getDate() - 30)
var time2 = now.getTime()
var crawlUrls = []
for (let i = 0; i < 5; i++) {
  crawlUrls.push(`${seedUrl + i}`)
}

var topicArr = async function () {
  var topicUrls = []
  for (let i = 0; i < crawlUrls.length; i++) {
    let req = await superagent.get(crawlUrls[i])
    let tempArr = resolve.getTopicUrls(req.text)
    Array.prototype.push.apply(topicUrls, tempArr)
  }
  return topicUrls
}

var topicInfos = function () {
  topicArr().then(async (res) => {
    for (let i = 0; i < res.length; i++) {
      let rentInfo = await getRentInfo(res[i])
      sleep.sleep(30)
      if (rentInfo.title !== '' && time2 < rentInfo.publicData) {
        new db.Article(rentInfo).save()
        console.log('插入成功')
      }
    }
    console.log('插入完成')
  })
}

async function getRentInfo (url) {
  let req = await superagent.get(url)
  return resolve.getTopicInfo(req.text, url)
}

// schedule.scheduleJob('* 30 * * * *', topicInfos())
topicInfos()
